World Geospatial chloropleth plot of cases
Interactve Geospatial chloropleth plot of cases
Today we will make a chloropleth of the countries in a world map like in the article Coronavirus Map: Tracking the Global Outbreak that looks like this -

For this we will use the JHU CSSE Dataset
#hide_output
import pandas as pd
import geopandas as gpd
import altair as alt
import numpy as np
alt.renderers.set_embed_options(actions=False)
I made the following geojson file from the US State Department Global LSIB Polygons Detailes after simplifying it as it has too much details and is very large. Following is the code to do that.
#collapse
us_st_world = gpd.read_file('/home/walker/my_git_repos/fastpages-covidviz/_notebooks/shapes/Global_LSIB_Polygons_Detailed/Global_LSIB_Polygons_Detailed.dbf')
us_st_world.drop(['OBJECTID', 'Shape_Leng', 'Shape_Le_1', 'Shape_Area'], axis=1, inplace=True)
us_st_world["geometry"] = us_st_world.geometry.simplify(tolerance=0.05)
us_st_world.to_file("world.geojson", driver='GeoJSON')
#alt.Chart(us_st_world_).mark_geoshape(strokeWidth=1, stroke='white').encode().properties(width=1000, height=500).project('equalEarth')
world_geojson = 'https://raw.githubusercontent.com/armsp/covidviz/master/assets/world.geojson'
us_st_world = gpd.read_file(world_geojson)
uri = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
time_s_raw = pd.read_csv(uri)
time_s = time_s_raw.groupby('Country/Region').agg(dict(zip(time_s_raw.columns[4:], ['sum']*(len(time_s_raw.columns)-4))))
time_s = time_s.reset_index()
#time_s
Let's first find out what countries in our dataset are not present in the shapefile
time_s[time_s['Country/Region'].isin(us_st_world['COUNTRY_NA']) == False]
Now we need to understand that the monikers of the countries can change and that we need to figure out how to unify them and then merge them. For that let's study each of the missing countries one by one like so -
us_st_world[us_st_world['COUNTRY_NA'].str.startswith('Antigua')]
Do the same technique for all the contries and you'd end up with the following modifications -
time_s.loc[time_s['Country/Region']=='Taiwan*', 'Country/Region'] = 'Taiwan'
time_s.loc[time_s['Country/Region']=='US', 'Country/Region'] = 'United States'
time_s.loc[time_s['Country/Region']=='Czech Republic', 'Country/Region'] = 'Czechia'
time_s.loc[time_s['Country/Region']=='West Bank and Gaza', 'Country/Region'] = 'West Bank (disp)'
time_s.loc[time_s['Country/Region']=='Western Sahara', 'Country/Region'] = 'Western Sahara (disp)'
time_s.loc[time_s['Country/Region']=='Trinidad and Tobago', 'Country/Region'] = 'Trinidad & Tobago'
time_s.loc[time_s['Country/Region']=='Sao Tome and Principe', 'Country/Region'] = 'Sao Tome & Principe'
time_s.loc[time_s['Country/Region']=='Saint Vincent and the Grenadines', 'Country/Region'] = 'St Vincent & the Grenadines'
time_s.loc[time_s['Country/Region']=='Saint Lucia', 'Country/Region'] = 'St Lucia'
time_s.loc[time_s['Country/Region']=='Saint Kitts and Nevis', 'Country/Region'] = 'St Kitts & Nevis'
time_s.loc[time_s['Country/Region']=='North Macedonia', 'Country/Region'] = 'Macedonia'
time_s.loc[time_s['Country/Region']=='Bahamas', 'Country/Region'] = 'Bahamas, The'
time_s.loc[time_s['Country/Region']=='Bosnia and Herzegovina', 'Country/Region'] = 'Bosnia & Herzegovina'
time_s.loc[time_s['Country/Region']=='Central African Republic', 'Country/Region'] = 'Central African Rep'
time_s.loc[time_s['Country/Region']=='Eswatini', 'Country/Region'] = 'Swaziland'
#time_s.loc[time_s['Country/Region']=='South Korea', 'Country/Region'] = 'Korea, South'
time_s.loc[time_s['Country/Region']=='Congo (Kinshasa)', 'Country/Region'] = 'Congo, Dem Rep of the'
time_s.loc[time_s['Country/Region']=='Congo (Brazzaville)', 'Country/Region'] = 'Congo, Rep of the'
time_s.loc[time_s['Country/Region']=='Antigua and Barbuda', 'Country/Region'] = 'Antigua & Barbuda'
# collapse
time_s[time_s['Country/Region'].isin(us_st_world['COUNTRY_NA']) == False]
Finding cases per day -
time_s_T = time_s.set_index('Country/Region').T
time_s_T = time_s_T.apply(lambda x: x.diff(), axis=0)
Averageing the cases over a week -
# hide_output
roll_case_avg_list = []
def roll_case_avg(row):
#print(row)
avgs = row[::-1].rolling(window=7).mean().apply(np.floor).shift(-6)
roll_case_avg_list.append((row.name, avgs.iloc[0], avgs.iloc[14]))
#print(avgs.iloc[1], avgs.iloc[8])
p = time_s_T.T
p.apply(roll_case_avg, axis=1)
#roll_case_avg_list
I asked the NYT GitHub Team on how they are establishing the category colors and based on their input we will use the following classification -
The thresholds for that change are:
- Blue: < -15%
- Yellow: > -15% and < +15%
- Light orange: >+15% and <+100%
- Mid orange: >+100% and <+200%
- Dark red: >+200%
def categorize(x):
if x['now'] == 0 or x['ago'] == 0:#x['ago'] <= 5 or
return 'Few or no cases'
delta = x['diff']/x['ago']*100
if delta < -15:
return 'Declining'
elif delta > -15 and delta < 15:
return 'About the same'
elif delta > 15 and delta < 100:
return 'Growth upto 2x'
elif delta > 100 and delta < 200:
return 'Growth upto 3x'
elif delta > 200:
return 'Growth more than 3x'
test2 = pd.DataFrame(roll_case_avg_list, columns=['country','now','ago'])
test2['diff'] = test2['now'] - test2['ago']
test2['category'] = test2.apply(categorize, axis=1)
test2.groupby('category').count()
test2.columns = ['COUNTRY_NA', 'now', 'ago', 'diff', 'category']
plot2 = us_st_world.merge(test2, how='left', on='COUNTRY_NA')
plot2
Now we are ready to plot the chloropleth -
# collapse
base=alt.Chart(plot2).mark_geoshape(stroke='white').transform_filter((alt.datum.COUNTRY_NA != 'Antarctica')).encode(
color = alt.Color('category:N',
scale=alt.Scale(
domain=['Few or no cases', 'Declining', 'About the same', 'Growth upto 2x', 'Growth upto 3x', 'Growth more than 3x'],
range=['#f2f2f2', '#badee8', '#f2df91', '#ffae43', '#ff6e0b', '#ce0a05']
),
legend=alt.Legend(title=None, orient='top', labelBaseline='middle', symbolType='square', columnPadding=20, labelFontSize=15, gridAlign='each', symbolSize=200)
),
tooltip = ['COUNTRY_NA', alt.Tooltip('now:Q', format='.0d'), alt.Tooltip('ago:Q', format='.0d'), 'category']
).properties(height=800, width=1500).project('equalEarth').configure_view(strokeWidth=0)
base
We can do something even more interesting...we can make the chart interactive by highlighting the countries based on their category - Falling, Almost the same, Rising 1, Rising 2 and Rising 3.
#collapse
selector = alt.selection_single(
fields=['category'],
empty='all',
bind='legend'
)
interactive = base.encode(
color = alt.Color(
'category:N',
legend=alt.Legend(values=['Declining', 'About the same', 'Growth upto 2x', 'Growth upto 3x', 'Growth more than 3x'], title=None, orient='top', labelBaseline='middle', symbolType='square', columnPadding=20, labelFontSize=15, gridAlign='each', symbolSize=200),
scale=alt.Scale(
domain=['Few or no cases', 'Declining', 'About the same', 'Growth upto 2x', 'Growth upto 3x', 'Growth more than 3x'],
range=['#f2f2f2', '#badee8', '#f2df91', '#ffae43', '#ff6e0b', '#ce0a05']
)
),
opacity=alt.condition(selector, alt.value(1), alt.value(0.25))
).add_selection(
selector
)
Now click on the legend to highlight the countries for that category.
interactive